home *** CD-ROM | disk | FTP | other *** search
/ CD ROM Paradise Collection 4 / CD ROM Paradise Collection 4 1995 Nov.iso / program / swags_z.zip / STRINGS.SWG / 0060_BOOLEAN String Function.pas < prev    next >
Pascal/Delphi Source File  |  1993-10-28  |  27KB  |  405 lines

  1. Unit BoolPos;
  2.  
  3. {        Version 1.3.3.P.
  4.  
  5.         Requires Borland Turbo Pascal version 6.0 or later to compile.
  6.  
  7.         Author:  Bruce J. Lackore.  Created Friday, July 23, 1993.
  8.         Copyright (c) 1993 Bruce J. Lackore.  ALL RIGHTS RESERVED.
  9. }
  10.  
  11. {$IFDEF Test}
  12.         {$A+,B-,D+,F-,G-,I+,L+,O-,R+,S+,V-,X+}
  13. {$ELSE}
  14.         {$A+,B-,D-,F-,G-,I-,L-,O-,R-,S-,V-,X+}
  15. {$ENDIF}
  16.  
  17. {        This unit comprises a function capable of searching a string for multiple
  18.         occurences of substrings using Boolean operators.  In the search string,
  19.         Boolean operators And and Or are defined as follows:
  20.  
  21.                 & - And
  22.                 | - Or
  23.  
  24.         Parentheses are supported for doing multiple searches.  Search strings are
  25.         submitted as follows:
  26.  
  27.                 i.e. In the source string "The quick brown fox jumped over the lazy dog"
  28.                                         and the search is for the word blue and the words quick or fox,
  29.                                         the search string is entered as follows:
  30.  
  31.                                                 (blue&(quick|fox))
  32.  
  33.         The way the function is currently written, And (&) and Or (|) have the same
  34.         precedence level hence the above search string without parentheses would be
  35.         interpretted to be (blue&quick|fox):
  36.  
  37.                 blue And quick would be searched for first, the result Or'd with the
  38.                 results of the search for fox.
  39.  
  40.         Notice the difference in that (blue&(quick|fox)) is a False statement whilst
  41.         (blue&quick|fox) is True.
  42.  
  43.         The function will automatically scan for () pairs, adding the necessary )
  44.         at the end of the search string or ( at the beginning if required.
  45.  
  46.         The function will also search for (|, |), (& and &) symbols, these being
  47.         illegal.
  48.  
  49. }
  50.  
  51. {        Bug fixes:
  52.  
  53.                 10/04/1993:        Noticed that length of Src_str in function Next_CPos was
  54.                                                                 incorrectly calculated because of positioning of INC DI.
  55.                                                                 INC DI precedes the MOV CL,[ES:DI] causing the function to
  56.                                                                 consider the first character of Src_str to represent the
  57.                                                                 length rather than the actual length byte.  Fix is to move
  58.                                                                 the INC DI to the line following the MOV CL,[ES:DI].
  59. }
  60.  
  61. Interface
  62.  
  63. Function BPos(Srch_str, Src_str:  String;  Ignore_case:  Boolean):  Boolean;
  64.  
  65. {        This function accepts a source string and a search string as described above
  66.         and returns a Boolean value based on whether or not the parsed search
  67.         string was found.
  68. }
  69.  
  70. { ************************************************************************** }
  71.  
  72. Implementation
  73.  
  74. Const
  75.         Lt_pn:                                                                                Char = '(';
  76.         Rt_pn:                                                                                Char = ')';
  77.  
  78. Function Cnt_ch(Scan_char:  Char;  In_str:  String):  Byte;  Assembler;
  79.  
  80. {        This function will scan a string for occurences of a particular character.
  81.         The function will return the number of occurences.
  82. }
  83.  
  84.         Asm  { Function Cnt_ch }
  85.                                                         XOR                AX,AX                                        {        0 AX }
  86.                                                         MOV                BL,Scan_char  {        Put char to count in BL }
  87.                                                         LES                SI,In_str     {        Set ES:SI to point to start of string }
  88.                                                         XOR                CX,CX         {        0 CX }
  89.                                                         MOV                CL,[ES:SI]    {        Move string length to CX }
  90.                                                         ADD                SI,CX         {        Set ES:SI to point to END of string }
  91.                 @LOOK:                CMP                BL,[ES:SI]    {        Start Loop, compare current char and BL }
  92.                                                         JNE                @NEXT         {        If not equal, jump to end of loop }
  93.                                                         INC                AX            { If equal, Inc char cnt (AX) }
  94.                 @NEXT:                DEC                SI            {        Set ES:SI back one character }
  95.                                                         LOOP        @LOOK         {        Decrement CX and jump to start of loop }
  96.         End;  { Function Cnt_ch }
  97.  
  98. Function Fill_str(Dupe_ch:  Char;  How_many:  Byte):  String;  Assembler;
  99.  
  100. {        This function returns How_many of Dupe_char.
  101. }
  102.  
  103.         Asm  { Function Fill_str }
  104.                                                         LES                DI, @Result                {        Set ES:DI to function result area }
  105.                                                         CLD                 {        Clear direction flag }
  106.                                                         XOR         CH,CH         {        0 CH }
  107.                                                         MOV         CL,How_many          { Length in CX }
  108.                                                         MOV         AX,CX                { and in AX }
  109.                                                         STOSB                     { Store length byte }
  110.                                                         MOV         AL,Dupe_ch    {        Put char to dupe in AL }
  111.                                                         REP         STOSB         { Fill string with char }
  112.         End;  { Function Fill_str }
  113.  
  114. Function PosC(Srch_ch:  Char;  Src_str:  String):  Boolean;  Assembler;
  115.  
  116. {        This function is similar to the Pos function of Pascal except that it
  117.         accepts only a single character to search for.  This function returns a
  118.         True if a Srch_ch is encountered, a False if not.
  119. }
  120.  
  121.         Asm  { Function PosC }
  122.                                                         XOR                BX,BX                                        {        0 BX }
  123.                                                         MOV                AL,Srch_ch    {        Put char to look for in AL }
  124.                                                         LES                DI,Src_str    {        Set ES:DI to start of Src_str }
  125.                                                         XOR                CX,CX         {        0 CX }
  126.                                                         MOV                CL,[ES:DI]    {        Store length of Src_str in CL }
  127.                                                         ADD                DI,CX         {        Set ES:DI to end of string }
  128.                                                         STD                 {        Set direction flag }
  129.                 @LOOK:                REPNZ        SCASB         {        Look for AL in Src_str }
  130.                                                         JNZ                @DONE         {        If not found, jump to end (BX = 0) }
  131.                                                         INC                BX            {        If Found, Inc Bx  to 1 = Pascal True }
  132.                 @DONE:                MOV                AX,BX         {        Move BX to AX (return result) }
  133.         End;  { Function PosC }
  134.  
  135. Function Last_Cpos(Srch_ch:  Char;  Src_str:  String):  Byte;  Assembler;
  136.  
  137. {        This function performs the same function as the Pascal POS function except
  138.         that it works only with a single character and rather than returning the
  139.         first position the character is found in, it returns the LAST position that
  140.         the search character is found in.
  141. }
  142.  
  143.         Asm { Function Last_Cpos }
  144.                                                         MOV                AL,Srch_ch                {        Put char to look for in AL }
  145.                                                         LES                DI,Src_str    {        Set ES:DI to start of Src_str }
  146.                                                         XOR                CX,CX         {        0 CX }
  147.                                                         MOV                CL,[ES:DI]    {        Move length of Src_str to CL }
  148.                                                         ADD                DI,CX         {        Set ES:DI to end of Src_str }
  149.                                                         INC                CX            { Add one to CX (correct for string length }
  150.                                                         STD                 {        Set direction flag }
  151.                                                         REPNZ        SCASB         {        Look for character in string }
  152.                                                         MOV                AX,CX         { If found CX indicates position, else 0 }
  153.         End;  { Function Last_Cpos }
  154.  
  155. Function Next_CPos
  156.         (Srch_ch:  Char;  Src_str:  String;  Strt_at:  Byte):  Byte;  Assembler;
  157.  
  158. {        This function searches for the next occurence of Srch_ch in Src_str AFTER
  159.         position Strt_at.  The function returns the offset from the beginning of
  160.         the string, NOT the offset from Strt_at.
  161. }
  162.  
  163.         Asm  { Function Next_CPos }
  164.                                                         XOR                AX,AX         {        0 AX }
  165.                                                         MOV                AL,Strt_at    {        Move position to start at to AL }
  166.                                                         LES                DI,Src_str    {        Set ES:DI to start of Src_str }
  167.                                                         XOR                CX,CX         {        0 CX }
  168.                                                         MOV                CL,[ES:DI]    {        Store length of Src_str in CL }
  169.                                                         INC                DI            {        Set ES:DI to first char of Src_str }
  170.                                                         MOV                BX,CX         {        Move CX to BX }
  171.                                                         SUB                CX,AX         {        Set CX to length of string after Strt_at }
  172.                                                         ADD                DI,AX         {        Set ES:DI to char at Strt_at in Src_str }
  173.                                                         MOV                AL,Srch_ch    {        Move Srch_ch to AL }
  174.                                                         CLD                 {        Clear direction flag }
  175.                                                         REPNZ        SCASB         {        Look for character following Strt_at }
  176.                                                         JNZ                @NOTFND       {        If not found, jump to end of procedure }
  177.                                                         SUB                BX,CX         {        Set BX to position char found in }
  178.                                                         JMP                @DONE         {        Jump to end of procedure }
  179.                 @NOTFND:        XOR                BX,BX         {        Srch_ch not found, set BX to 0 }
  180.                 @DONE:                MOV                AX,BX         {        Move position found at (BX) to AX }
  181.         End;  { Function Next_CPos }
  182.  
  183. Function Up_cs(In_str:  String):  String;
  184.  
  185. {        This function converts In_str to all upper case characters.
  186. }
  187.  
  188.         Begin  { Function Up_cs }
  189.                 Inline(
  190.                         $1E/                                                                {                                        PUSH DS  }
  191.                         $C4/$7E/$0A/                                {                                        LES         DI,[BP+$0A]  }
  192.                         $C5/$76/$06/                                {                                        LDS         SI,[BP+$06]  }
  193.                         $30/$E4/                                                {                                        XOR         AH,AH  }
  194.                         $AC/                                                                {                                        LODSB  }
  195.                         $AA/                                                                {                                        STOSB  }
  196.                         $89/$C1/                                                {                                        MOV         CX,AX  }
  197.                         $E3/$0F/                                                {                                        JCXZ DONE  }
  198.                         $FC/                                                                {                                        CLD  }
  199.                         $AC/                                                                {DOCHAR:        LODSB  }
  200.                         $3C/$61/                                                {                                        CMP         AL,'a'  }
  201.                         $72/$06/                                                {                                        JB         NEXTCH  }
  202.                         $3C/$7A/                                                {                                        CMP         AL,'z'  }
  203.                         $77/$02/                                                {                                        JA         NEXTCH  }
  204.                         $24/$DF/                                                {                                        AND         AL,$DF  }
  205.                         $AA/                                                                {NEXTCH:        STOSB  }
  206.                         $E2/$F2/                                                {                                        LOOP DOCHAR  }
  207.                         $1F)                                                                {DONE:                POP         DS  }
  208.         End;  { Function Up_cs }
  209.  
  210. Function Fixup_srch_str(Srch_str:  String):  String;
  211.  
  212. {        This functions sole purpose in life is to count the number of parantheses
  213.         pairs and correct for a deficient number of either by adding the appropriate
  214.         character either at the beginning or the end of the search string.  This
  215.         may not yield the correct result as the searcher intended but is a
  216.         requirement of the algorithm (it searches for paran pairs).  Note that the
  217.         function will add one set of parantheses if none are found.  This function
  218.         also looks for illegal character pairs (&, &), (| and |), these pairs
  219.         indicate an illegal Boolean search.  The function returns the corrected
  220.         Srch_str if all is well, an empty string if not.
  221. }
  222.  
  223.         Var
  224.                 Left_para,
  225.                 Right_para,
  226.                 How_many:                                                                Integer;
  227.  
  228.         Begin  { Function Fixup_srch_str }
  229.                 Left_para         := Cnt_ch(Lt_pn, Srch_str);                                        {        Count the parens }
  230.                 Right_para         := Cnt_ch(Rt_pn, Srch_str);
  231.                 How_many                 := Abs(Left_para - Right_para);     { Get the difference }
  232.                 If How_many > 0 Then
  233.                         If Right_para < Left_para Then
  234.                                 Srch_str := Srch_str + Fill_str(Rt_pn, How_many)
  235.                         Else
  236.                                 Srch_str := Fill_str(Lt_pn, How_many) + Srch_str
  237.                 Else
  238.                         If (Srch_str[1] <> Lt_pn) Or                                                                        { No parens?  Add 'em }
  239.                                 (Srch_str[Ord(Srch_str[0])] <> Rt_pn) Then
  240.                                         Srch_str := Lt_pn + Srch_str + Rt_pn;
  241.                 If (Pos(Lt_pn + '&', Srch_str) <> 0) Or         { Illegal call? }
  242.                         (Pos('&' + Rt_pn, Srch_str) <> 0) Or
  243.                         (Pos(Lt_pn + '|', Srch_str) <> 0) Or
  244.                         (Pos('|' + Rt_pn, Srch_str) <> 0) Then
  245.                                 Fixup_srch_str := ''
  246.                 Else
  247.                         Fixup_srch_str := Srch_str                    { All is well }
  248.         End;  { Function Fixup_srch_str }
  249.  
  250. Function Parse_srch_str(Srch_str, Src_str:  String):  String;
  251.  
  252. {        This function simply extracts each string to search for, tests to see if
  253.         it exists in the original string and replaces the extracted substring with
  254.         the appropriate token.  It should be noted that each substring is determined
  255.         solely by the characters used for parantheses.  Any other characters are
  256.         assumed to be part of the search string (except the & and | operators).
  257.  
  258.         Each substring is searched for in the original Search_str and its presense
  259.         or absense noted with a T or F respectively.
  260. }
  261.  
  262.         Var
  263.                 Rtn_str,
  264.                 Token_str:                                                        String;
  265.                 End_token:                                                        Boolean;
  266.  
  267.         Begin  { Function Parse_srch_str }
  268.                 Token_str         := '';
  269.                 Rtn_str                        := '';
  270.                 While Srch_str <> '' Do
  271.                         Begin
  272.                                 If (Srch_str[1] In [Lt_pn, Rt_pn, '&', '|']) Then { Token starts? }
  273.                                         Begin
  274.                                                 End_token := (Token_str <> '');       { End of token?  If not }
  275.                                                 If Not(End_token) Then                { then start one.       }
  276.                                                         Rtn_str := Rtn_str + Srch_str[1]
  277.                                         End
  278.                                 Else
  279.                                         Begin
  280.                                                 Token_str := Token_str + Srch_str[1]; { Add a char to substring }
  281.                                                 End_token        := False
  282.                                         End;
  283.                                 If End_token Then                         { If complete token, look }
  284.                                         Begin                                   { for it in the source str }
  285.                                                 If Pos(Token_str, Src_str) <> 0 Then
  286.                                                         Rtn_str := Rtn_str + 'T'            { If found, return T }
  287.                                                 Else
  288.                                                         Rtn_str := Rtn_str + 'F';           { If not, return F   }
  289.                                                 Rtn_str         := Rtn_str + Srch_str[1];
  290.                                                 Token_str := '';                      { Reset to look for more }
  291.                                                 End_token        := False
  292.                                         End;  { If End_token }
  293.                                 Delete(Srch_str, 1, 1)                    { Delete the char just
  294.                                                                                                                                                                                                                 processed and start again
  295.                                                                                                                                                                                                         }
  296.                         End;  { While Srch_str <> '' }
  297.                 Parse_srch_str := Rtn_str
  298.         End;  { Function Parse_srch_str }
  299.  
  300. Function Process_token_str(Token_str:  String):  Char;
  301.  
  302.         Var
  303.                 One_token:                                                        String;
  304.                 One_token_len,
  305.                 Left_para:                                                        Byte;
  306.  
  307.         Function Process_one_token_str(The_token:  String):  Char;
  308.  
  309.                 Var
  310.                         Lcv:                                                                        Byte;
  311.                         Curr_answer,
  312.                         Do_and:                                                                Boolean;
  313.  
  314.                 Begin  { Function Process_one_token_str }
  315.                         Curr_answer := (The_token[1] = 'T');      { Establish current answer
  316.                                                                                                                                                                                                         by checking first token.
  317.                                                                                                                                                                                                 }
  318.                         For Lcv := 2 to Length(The_token) Do      { Look at the rest of the
  319.                                                                                                                                                                                                         token str.
  320.                                                                                                                                                                                                 }
  321.                                 Case The_token[Lcv] of                  { Boolean op is And }
  322.                                         '&':        Do_and := True;                 { Boolean op is Or }
  323.                                         '|':        Do_and := False;
  324.                                         'T':        If Do_and Then
  325.                                                                         Curr_answer := Curr_answer And True  { If And }
  326.                                                                 Else
  327.                                                                         Curr_answer := True;                 { If Or }
  328.                                         'F':        If Do_and Then                         { If And (Or stays T) }
  329.                                                                         Curr_answer := False;
  330.                                 End;  { Case }
  331.                         If Curr_answer Then                      { Final result }
  332.                                 Process_one_token_str := 'T'
  333.                         Else
  334.                                 Process_one_token_str        := 'F'
  335.                 End;  { Function Process_one_token_str }
  336.  
  337.         Begin  { Function Process_token_str }
  338.  
  339.                 { Are parens present?  If so process as tokenized phrase, if not, final
  340.                         result has been received.
  341.                 }
  342.  
  343.                 If PosC(Lt_pn, Token_str) Then
  344.                         Begin
  345.                                 While Length(Token_str) > 1 Do
  346.                                         Begin
  347.  
  348.                                                 { Find leftmost left paren }
  349.  
  350.                                                 Left_para                 := Last_Cpos(Lt_pn, Token_str);
  351.  
  352.                                                 { Find first right paren after leftmost left paren }
  353.  
  354.                                                 One_token_len :=
  355.                                                         Succ(Next_CPos(Rt_pn, Token_str, Left_para) - Left_para);
  356.  
  357.                                                 { Copy everything between the two }
  358.  
  359.                                                 One_token := Copy(Token_str, Left_para, One_token_len);
  360.  
  361.                                                 { Remove the parens }
  362.  
  363.                                                 Delete(One_token, 1, 1);
  364.                                                 Dec(One_token[0]);
  365.  
  366.                                                 { Remove the original substring from the phrase }
  367.  
  368.                                                 Delete(Token_str, Left_para, One_token_len);
  369.  
  370.                                                 { Insert the resultant single character in place of the old
  371.                                                         substring.
  372.                                                 }
  373.  
  374.                                                 Insert(Process_one_token_str(One_token), Token_str, Left_para)
  375.                                         End;  { While Length(Token_str) > 1 }
  376.                                 Process_token_str := Token_str[1]
  377.                         End
  378.                 Else
  379.                         Process_token_str := Process_one_token_str(One_token)
  380.         End;  { Function Process_token_str }
  381.  
  382. Function BPos;
  383.  
  384.         Begin  { Function BPos }
  385.                 If Ignore_case Then
  386.                         Begin
  387.                                 Srch_str         := Up_cs(Srch_str);
  388.                                 Src_str   := Up_cs(Src_str)
  389.                         End;  { If Ignore_case }
  390.  
  391.                 {        Is this a Boolean expression?  If so process with this function, else
  392.                         process with Pascal POS function.
  393.                 }
  394.  
  395.                 If PosC('|', Srch_str) Or PosC('&', Srch_str) Then
  396.                         Begin
  397.                                 Srch_str := Parse_srch_str(Fixup_srch_str(Srch_str), Src_str);
  398.                                 If Srch_str <> '' Then
  399.                                         BPos := (Process_token_str(Srch_str) = 'T')
  400.                         End
  401.                 Else
  402.                         BPos := Pos(Srch_str, Src_str) <> 0
  403.         End;  { Function BPos }
  404.  
  405. End.  { Unit BoolPos }